clear
set more off, perm
cd /Users/zachbrown/Projects/PriceTransparency/Data/

// Use medical claims
global year_min 2005
global year_max 2014
global keep_vars "record_id member_key bill_prov_key sex serv_prov_key serv_prov_cw_key proc_code amt* from_date to_date year age_eom ub_facility ub_bill_class adm_type member_zip claim_status_orig rev_maj"

forval yr = $year_min(1)$year_max {
		disp "year: `yr'"
		tempfile tmpdata
		shell nice gunzip Raw/med_clm/med_clm_`yr'_clean.dta -c > `tmpdata'
		append using `tmpdata', keep($keep_vars)
	}


// Make date
drop year
gen year = year(from_date)
gen ym = ym(year,month(from_date))
format ym %tm

// Choose date range
drop if ym>ym(2012,2)

// Sex 
gen male = (sex==1)
label var male "Male"
drop sex

// Drop those that might have medicare
drop if age >= 65

// Drop claims that are not real
drop if claim_status_orig==4 // drop denied claims
drop if claim_status_orig==22 // drop reversed claims


// Label members that never went to a provider in NH
rename serv_prov_key prov_key
merge m:1 prov_key using build/provider_build.dta, keepusing(prov_in_nh prov_clinic_zip individual prov_type)
drop if _merge==2
drop _merge
rename prov_key serv_prov_key
rename prov_clinic_zip serv_prov_zip

gen prov_in_nh_tmp = (prov_in_nh==1)
bys member_key year: egen member_ever_nh = max(prov_in_nh_tmp)
gen member_never_nh=~member_ever_nh
drop member_ever_nh prov_in_nh_tmp
tab member_never_nh, miss
label var member_never_nh "Member never visited NH provider during year"
rename prov_in_nh serv_prov_in_nh

// Define visit
global visit_vars "member_key from_date"
sort $visit_vars

// Drop if missing CPT
drop if proc_code==""

// Drop if inpatient
label list ub_bill_class
drop if inlist(ub_bill_class,1,2)

// Merge on radiology CPT codes
merge m:1 proc_code using build/cms_cpt_codes_radio.dta
drop if _merge==2

gen proc_radiology = (_merge==3)
drop _merge
rename on_web proc_on_web

// Keep radiology visits only
sort $visit_vars
by $visit_vars : egen visit_radiology = max(proc_radiology)
tab visit_radiology, miss
keep if visit_radiology==1
drop visit_radiology

tab proc_radiology proc_on_web, miss


// Make indicator for treated
by $visit_vars : egen visit_on_web = max(proc_on_web)
tab visit_on_web, miss


// Find procedure code for most expensive radiology procedure within visit
gen amt_billed_radio = amt_billed if proc_radiology==1
gen amt_oop_radio = amt_member_oop if proc_radiology==1
gen amt_paid_radio = amt_paid if proc_radiology==1

by $visit_vars : egen amt_billed_radio_max = max(amt_billed_radio)
gen is_radio_max = (amt_billed_radio==amt_billed_radio_max) & proc_radiology
gen proc_class_max = proc_class if is_radio_max==1

gen amt_oop_radio_max = amt_member_oop if is_radio_max==1
gen amt_paid_radio_max = amt_paid if is_radio_max==1

tab is_radio_max proc_radiology, miss

gen proc_id_radio = proc_id_1 if proc_radiology & inrange(ym,start_ym_1,end_ym_1) & start_ym_1~=.
replace proc_id_radio = proc_id_2 if proc_radiology & inrange(ym,start_ym_2,end_ym_2) & start_ym_2~=.
replace proc_id_radio = proc_id_3 if proc_radiology & inrange(ym,start_ym_3,end_ym_3) & start_ym_3~=.
replace proc_id_radio = proc_id_4 if proc_radiology & inrange(ym,start_ym_4,end_ym_4) & start_ym_4~=.
replace proc_id_radio = proc_id_5 if proc_radiology & inrange(ym,start_ym_5,end_ym_5) & start_ym_5~=.

gen proc_id_radio_max = proc_id_radio if is_radio_max==1


// Merge on proc_id2
merge m:1 proc_code using build/proc_code_xwalk.dta
drop if _merge==2
drop _merge

count if is_radio_max & proc_id_radio==.
count if proc_radiology & proc_id_radio==.

drop amt_billed_radio amt_billed_radio_max
tab visit_on_web, miss

// Find service provider CW key for most expensive radiology procedure within visit
rename serv_prov_key serv_prov_key_all
gen serv_prov_key = serv_prov_key_all if is_radio_max==1

rename bill_prov_key bill_prov_key_all
gen bill_prov_key = bill_prov_key_all if is_radio_max==1


// Find service provider by individual or not individual
gen serv_ind_cw_tmp = serv_prov_cw_key if individual==1
gen serv_noind_cw_tmp = serv_prov_cw_key if individual==0
gen serv_unkind_cw_tmp = serv_prov_cw_key if individual==.

gen serv_facility_cw_tmp = serv_prov_cw_key if prov_type==1
gen serv_group_cw_tmp = serv_prov_cw_key if prov_type==2
gen serv_prac_cw_tmp = serv_prov_cw_key if prov_type==4
gen serv_unktype_cw_tmp = serv_prov_cw_key if prov_type==.


bys $visit_vars : egen serv_ind_cw = mode(serv_ind_cw_tmp), minmode
bys $visit_vars : egen serv_noind_cw = mode(serv_noind_cw_tmp), minmode
bys $visit_vars : egen serv_unkind_cw = mode(serv_unkind_cw_tmp), minmode

bys $visit_vars : egen serv_facility_cw = mode(serv_facility_cw_tmp), minmode
bys $visit_vars : egen serv_group_cw = mode(serv_group_cw_tmp), minmode
bys $visit_vars : egen serv_prac_cw = mode(serv_prac_cw_tmp), minmode
bys $visit_vars : egen serv_unktype_cw = mode(serv_unktype_cw_tmp), minmode

mdesc serv_ind_cw serv_noind_cw serv_unkind_cw serv_facility_cw serv_group_cw serv_prac_cw serv_unktype_cw

drop serv_ind_cw_tmp serv_noind_cw_tmp serv_unkind_cw_tmp serv_facility_cw_tmp serv_group_cw_tmp serv_prac_cw_tmp serv_unktype_cw_tmp

// Merge on provider crosswalk and find tax id, NPI
rename serv_prov_key_all prov_key
merge m:1 prov_key using build/prov_cw_crosswalk.dta, keepusing(prvtax_id npi_id)
drop if _merge==2
drop _merge
rename prov_key serv_prov_key_all

gen serv_noradio_prvtax_id = prvtax_id if is_radio_max==0
gen serv_noradio_npi_id = npi_id if is_radio_max==0
drop prvtax_id npi_id


// Is radiology proc most expensive in visit?
bys $visit_vars : egen max_bill = max(amt_billed)
gen is_max_bill_radio = (amt_billed==max_bill) & proc_radiology==1
bys $visit_vars : egen max_bill_is_radio = max(is_max_bill_radio)
drop is_max_bill_radio max_bill


// Label primary claims
gen primary_ins = (claim_status_orig==1)


// Label emergencies
label list adm_type
bys $visit_vars : egen adm_type2 = mode(adm_type), minmode
gen proc_emerg = (adm_type2==1 | inlist(rev_maj,450,451,452,456,459))


// Make overnight variable
gen overnight = (to_date>from_date) & to_date~=.
drop to_date


// Find mode of member_zip variables
foreach var of varlist member_zip primary_ins serv_prov_zip serv_noradio_prvtax_id serv_noradio_npi_id {
	disp "Var: `var'"
	rename `var' tmp_`var'
	by $visit_vars : egen `var'=mode(tmp_`var'), minmode
	drop tmp_`var'
}

// Find mode of provider/facility/member_zip variables 
foreach var of varlist ub_facility ub_bill_class member_zip {
	disp "Var: `var'"
	rename `var' tmp_`var'
	by $visit_vars : egen `var'=mode(tmp_`var'), missing
	drop tmp_`var'
}

// Drop if visit contains rare procedures
bys proc_code: egen n_proc_unique = count(from_date)
bys $visit_vars : egen n_proc_unique_min = min(n_proc_unique)
drop n_proc_unique




// Collapse to visit level
gen n_procs=1
gen n_procs_radio = (proc_radiology==1)
gen n_procs_on_web= (proc_on_web==1)
gen amt_billed_radio = amt_billed if proc_radiology==1
gen amt_billed_radio_max = amt_billed if is_radio_max==1
gen amt_billed_on_web = amt_billed if proc_on_web==1
*summ amt_billed_radio amt_billed_radio_max

// Output proc level dataset
keep visit_on_web serv_prov_in_nh member_never_nh proc_emerg* overnight  /// 
	n_procs n_procs_radio n_procs_on_web amt_* max_bill_is_radio ///
	record_id proc_id_radio proc_id_radio_max serv_prov_zip ///
	ub_facility ub_bill_class primary_ins ///
	age male member_zip n_proc_unique_min proc_class_max  ///
	proc_on_web on_web_procname proc_web_name_id proc_radiology is_radio_max ///
	bill_prov_key bill_prov_key_all serv_prov_key serv_prov_key_all ///
	serv_ind_cw serv_noind_cw serv_unkind_cw serv_facility_cw serv_group_cw serv_prac_cw serv_unktype_cw ///
	serv_noradio_prvtax_id serv_noradio_npi_id ///
	$visit_vars ym year proc_id2 

// Use record id and proc name for most expensive radiology procedure within visit 
gen long rec_id_radio_tmp = record_id if is_radio_max==1
bys $visit_vars: egen long rec_id_radio = min(rec_id_radio_tmp)
drop rec_id_radio_tmp

compress	
if $use_sample==1 gsave build/proc_clms_1_sample.dta, replace
if $use_sample==2 gsave build/proc_clms_1_sample2.dta, replace
if $use_sample==0 gsave build/proc_clms_1.dta, replace

replace on_web_procname = . if is_radio_max~=1
replace proc_web_name_id = . if is_radio_max~=1
replace border_proc = . if is_radio_max~=1

collapse (mean) visit_on_web serv_prov_in_nh amt_billed_radio_mean=amt_billed_radio /// 
	(max) member_never_nh proc_emerg* overnight /// 
	(sum) n_procs n_procs_radio n_procs_on_web amt_*  ///
	(firstnm) rec_id_radio proc_id_radio_max proc_id2_rad serv_prov_zip ///
	ub_facility ub_bill_class primary_ins ///
	bill_prov_key serv_prov_key serv_ind_cw serv_noind_cw serv_unkind_cw serv_facility_cw serv_group_cw serv_prac_cw serv_unktype_cw ///
	serv_noradio_prvtax_id serv_noradio_npi_id ///
	age male member_zip n_proc_unique_min proc_class_max on_web_procname proc_web_name_id max_bill_is_radio ///
	, by($visit_vars ym year) fast

tab visit_on_web, miss

// Drop when procedure is very rare
sort proc_id_radio
by proc_id_radio: egen n_proc_tot = count(amt_member_oop)
drop if n_proc_tot<=1
drop n_proc_tot


// Apply value labels
label value ub_facility ub_facility
label value ub_bill_class ub_bill_class
label value proc_class_max proc_class
label value on_web_procname procname
label value proc_web_name_id proc_web_name_id

// Apply variable labels
label var age "Age"
label var male "Male"

sort $visit_vars
order rec_id_radio
compress

// Output 
gsave build/radiology_visit_1.dta, replace

